Personal Computer World 2008 February

home *** CD-ROM | disk | FTP | other *** search

/ Personal Computer World 2008 February / PCWFEB08.iso / Software / Freeware / Miro 1.0 / Miro_Installer.exe / xulrunner / python / adscraper.py < prev next >

Wrap

Python Source | 2007-11-12 | 2.7 KB | 74 lines

# Miro - an RSS based video player application # Copyright (C) 2005-2007 Participatory Culture Foundation # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import re # ============================================================================= # Purify a feed item data (usually its description) from ads. Returns the data # untouched if no ad were found. def purify(data): return _process(data, 'purify', data) # Scrape ads from a feed item data (usually its description). Returns an empty # string if no ad were found. def scrape(data): return _process(data, 'scrape', '') # ============================================================================= def _process(data, fkey, default): if data is None: return '' processed = None for funcs in FUNCS: process = funcs[fkey] processed = process(data) if processed is not None: break if processed is None: processed = default return processed # ============================================================================= FEEDBURNER_AD_PATTERN = re.compile(""" <p> # <p> <a\shref="http://feeds\.feedburner\.com/~a/[^"]*"> # <a href="..."> <img\ssrc="http://feeds\.feedburner\.com/~a/[^"]*"\sborder="0"> # <img src="..." border="0"> </img> # </img> </a> # </a> </p> # </p> """, re.VERBOSE) def _tryPurifyingFeedBurner(data): if FEEDBURNER_AD_PATTERN.search(data): return FEEDBURNER_AD_PATTERN.sub('', data) return None def _tryScrapingFeedBurner(data): match = FEEDBURNER_AD_PATTERN.search(data) if match is not None: return match.group(0) return None # ============================================================================= FUNCS = [ {'purify': _tryPurifyingFeedBurner, 'scrape': _tryScrapingFeedBurner} ]